package day04

/**
  * 读取hdfs上的文件 进行单词计数
  *
  */

import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

object AcceptSocketData {
  def main(args: Array[String]): Unit = {
    val sparkConf: SparkConf = new SparkConf().setAppName("AcceptSocketData").setMaster("local[2]")
    val sc: SparkContext = new SparkContext(sparkConf)
    sc.setLogLevel("WARN")
    val ssc: StreamingContext = new StreamingContext(sc,Seconds(5))
    //读取hdfs上的文件
    val data: DStream[String] = ssc.textFileStream("hdfs://node01:8020/test-sparkstreaming")
    //单词计数
    val wc: DStream[(String, Int)] = data.flatMap(x=>x.split(" ")).map((_,1)).reduceByKey(_+_)
  wc.print()
    //开启流式计算
    ssc.start()
    ssc.awaitTermination()

  }
}
